1 Figure 2

In this sub-vignette we present the analysis and source code for Figure 2. This sub-vignette can be built along with all others, to generate all figures, by running CLLCytokineScreen2021.Rmd.

1.1 Set up

Load libraries

library(RColorBrewer)
library(ggrepel)
library(patchwork)
library(ggplot2)
library(tidyr)
library(ggbeeswarm)
library(magrittr)
library(pheatmap)
library(ggfortify)
library(gridExtra)
library(DESeq2)
library(survival)
library(survminer)
library(glmnet)
library(ConsensusClusterPlus)
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
library(dplyr)
library(tidyverse)

Set plotting directory

plotDir = ifelse(exists(".standalone"), "", "../../inst/figs/")
if(plotDir!="") if(!file.exists(plotDir)) dir.create(plotDir)
## Warning in dir.create(plotDir): '..\..\inst\figs' existiert bereits

1.2 Load data

Raw

#df: tibble containing all screening viability data
load( "../../data/df.RData")

#patMeta: tibble containing all patient genetic data
load( "../../data/patMeta.RData")

#LDT: tibble containing Lymhpocyte DOubling Times for patients in screen
load( "../../data/LDT.RData")

#survT: tibble containing disease progression data for patients in screen 
load( "../../data/survT.RData")

#dds_smp: DESeqDataSet containing RNA counts and assosciated meta data for matched samples to those in screen 
load( "../../data/dds_smp.RData")
#From tsvs
#screening data
df <- read.table(file= "../../inst/extdata/df.txt",header = TRUE) %>% as_tibble()

#Patient meta data
patMeta <- read.table(file= "../../inst/extdata/patMeta.txt",header = TRUE) %>% as_tibble()

#Lymphocyte Doubling Times
LDT <- read.table(file= "../../inst/extdata/LDT.txt",header = TRUE) %>% as_tibble()

#Survival times
survT <- read.table(file= "../../inst/extdata/survT.txt",header = TRUE) %>% as_tibble()

#RNA data
rna_countsMatrix <- read.table(file=  "../../inst/extdata/rna_countsMatrix.txt", header = TRUE) 
coldata_rna <- read.table(file="../../inst/extdata/coldata_rna.txt", header= TRUE)
rowdata_rna <- read.table(file=  "../../inst/extdata/rowdata_rna.txt", header= TRUE) 


#Arrange data for analysis
#screening data
df$Cytokine <- factor(df$Cytokine, levels= c("No Cytokine",
                                              "Resiquimod", 
                                              "IL-4", 
                                              "TGF-b1",
                                              "IL-1b",
                                              "Interferon gamma",
                                              "SDF-1a",
                                              "sCD40L" ,
                                              "sCD40L+IL-4",
                                              "soluble anti-IgM", 
                                              "CpG ODN",
                                              "IL-6",
                                              "IL-10",
                                              "IL-21",
                                              "HS-5 CM", 
                                              "IL-15",
                                              "BAFF",
                                              "IL-2" ))


#patMeta
patMeta[sapply(patMeta, is.character)] <- lapply(patMeta[sapply(patMeta, is.character)], as.factor)
patMeta[sapply(patMeta, is.integer)] <- lapply(patMeta[sapply(patMeta, is.integer)], as.factor)
patMeta$PatientID <- as.character(patMeta$PatientID)

#RNA
#assemble deseq object
rownames(coldata_rna) <- coldata_rna$PatientID
dds_smp <- DESeqDataSetFromMatrix(rna_countsMatrix, coldata_rna, design =~1)
rowData(dds_smp) <- rowdata_rna

Process data

# Subset data to Cytokine only treatments, no drugs 

df_full<-df

df <- dplyr::filter(df, Drug == "DMSO", Cytokine != "No Cytokine")

1.3 Define Aesthetics

source("../../R/themes_colors.R")

1.4 Define additional functions

deckel function: to set limits of scaling factor. Accepts a number x, and two numeric limits, lower and upper.

deckel <- function(x, lower = -Inf, upper = +Inf) ifelse(x<lower, lower, ifelse(x>upper, upper, x))

medianCenter_MadScale function: to scale viability values according to MAD and then center values at zero. Maximum/minimum size of scaling factor set with deckel (above). Accepts a vector x to scale.

medianCenter_MadScale <- function(x) {
  s=0
  (x - s) / deckel(mad(x, center = s), lower = 0.05, upper = 0.2)
}

scaleCytResp function: to apply medianCenter_MadScale row wise to viability matrix. Accepts x, a matrix of log(viability) values.

scaleCytResp  <- function(x) t(apply(x, 1, medianCenter_MadScale)) 

Hierarchical clustering function: To provide cluster function for running ConsensusClusterPlus. Accepts this_dist, a dissimilarity structure as produced by dist and k, the number of clusters to assign.

myclusterfunction = function(this_dist, k){
    #run hierarchical cluster analysis on dissimilarity structure this_dist  
    hc = hclust(this_dist)
    #cut cut tree into k groups 
    assignment = cutree(hc, k)
    return(assignment)
}

Euclidean Distance function: To provide distance function for running ConsensusClusterPlus. Calculates Euclidean distances for matrix x.

myDistFunc = function(x){ dist(x, method="euclidean")}

Factor2Ind: To generate indicator matrix from a factor. Given a factor x, create an indicator matrix of dimension length(x) multiplied by nlevels(x)-1, dropping the column corresponding to the baseline level (by default the first level is used as baseline).

factor2ind <- function(x, baseline)
{

  xname <- deparse(substitute(x))
  n <- length(x)
  x <- as.factor(x)
  if(!missing(baseline)) x <- relevel(x, baseline)
  X <- matrix(0, n, length(levels(x)))
  X[(1:n) + n*(unclass(x)-1)] <- 1
  X[is.na(x),] <- NA
  dimnames(X) <- list(names(x), paste(xname, levels(x), sep = ":"))
  return(X[,-1,drop=FALSE])
}

Function for multinomial regression: To perform multinomial regression to identify genetic features that are predictors of cluster assignment. Provide a feature matrix X and a response matrix y, and specify method (regression method), repeats (number of repeats of the regression) and folds (number of folds to split the data into for cross validation).

runGlm.multi <- 
  function(X, y, method = "ridge", repeats=20, folds = 3) {
  modelList <- list()
  lambdaList <- c()
  
  coefMat <- 
    lapply(unique(y), function(n) {
    mat <- matrix(NA, ncol(X), repeats)
    rownames(mat) <- colnames(X)
    mat
  })
  
  names(coefMat) <- unique(y)
  
  alpha = switch(method, lasso = 1, ridge = 0, stop("Please provide a valid method: lasso or ridge"))

  
  for (i in seq(repeats)) {

      #balanced sampling
      vecFold <- mltools::folds(y, nfolds = folds, stratified = TRUE, seed = i*1996)
      res <- cv.glmnet(X, y, type.measure = "class",
                       foldid = vecFold, alpha = alpha, standardize = FALSE,
                       intercept = TRUE, family = "multinomial")
      lambdaList <- c(lambdaList, res$lambda.min)
      modelList[[i]] <- res
      
      coefModel <- coef(res, s = "lambda.min")
      for (n in names(coefModel)) {
        coefMat[[n]][,i] <- coefModel[[n]][-1]
      }
  }
  list(modelList = modelList, lambdaList = lambdaList, coefMat = coefMat)
  }

Sum coefficients: to drop all features from multinomial regression that don’t meet specified cut off criteria, and gather coefficients for all remaining other features, for all repeats of the regression. Accepts a matrix coefMat, plus numeric values for coefCut, the minimum value of that the average coefficient should be, and freqCut, the minimum proportion of repeats that a coefficient should be significant.

sumCoef <- function(coefMat, coefCut = 0, freqCut =1) {
  meanCoef <- rowMeans(abs(coefMat))
  freqCoef <- rowMeans(coefMat != 0)
  subMat <- coefMat[meanCoef > coefCut & freqCoef >= freqCut,,drop=FALSE]
  eachTab <- data.frame(subMat) %>%
     rownames_to_column("feature") %>% gather(key = "rep",value = "coef",-feature) %>%
     mutate(rep = gsub("X","",rep))
  return(eachTab)
} 

1.5 Plot Figures

1.5.1 Figure 2A

Heatmap of all scaled log(viability) values, for all patient samples and stimuli.
In the code below, the viability data is normalised to DMSO controls and each row is scaled according to MAD. Limits are applied to scaling factor for optimal visualisation. The heatmap is plotted using pheatmap: the ordering of the columns (patient samples) is obtained from the dendrogram that results from running ConsensusClusterPlus using hierarchical clustering with the Euclidean metric. The rows are globally ordered using the dendrogram order produced by hclust with default branch arrangement.

########### Viability matrix ################
#make viability matrix for cytokine treatments for patients
viab.mat <- dplyr::select(df, 
                          PatientID, 
                          Log, 
                          Cytokine) %>% 
            tidyr::spread(Cytokine, Log) %>%
  as.data.frame()

#make PatientID the row names
rownames(viab.mat) <- unlist(viab.mat[,1]) # the first row will be the header
viab.mat <- dplyr::select(viab.mat,-PatientID) 

#Transform matrix
viab.mat <- t(viab.mat)

#keep unscaled matrix 
viab.mat.unscaled <- viab.mat

#run scaleCytResp on viability matrix
viab.mat <- scaleCytResp(viab.mat)

#apply deckel function to matrix
#Calculate dendrogram 
breaks <- c(seq(-3, 3, length.out = 101)) 

viab.mat.lims <- deckel(viab.mat,
                            lower = dplyr::first(breaks),
                            upper = dplyr::last(breaks))

Run consensus clustering

results = ConsensusClusterPlus(d = viab.mat.lims, 
                               maxK = 7, 
                               reps = 10000, 
                               pItem = 0.8, 
                               pFeature = 1,  
                               clusterAlg = "myclusterfunction",
                               distance = "myDistFunc", 
                               plot = NULL, 
                               seed = 1386, 
                               finalLinkage = "complete", 
                               innerLinkage = "complete")
## end fraction
## clustered

## clustered

## clustered

## clustered

## clustered

## clustered

Get cluster annotations

#Extract consensus clusters for k = 4
clusters <- 
  results[[4]][["consensusClass"]] %>%
  tibble::enframe() %>% 
  dplyr::rename(PatientID="name", Cluster="value") 

#get table of clusters
clusters <- 
  clusters %>% 
  dplyr::select(PatientID, Cluster)

#get matrix
cluster_matrix <- as.dist( 1 - results[[4]]$ml )

#add clusters to patMeta
patMeta_cl <- left_join(clusters, patMeta, by = "PatientID")

Arrange annotations for heatmap

#Sort heatmap annotations
#Select annotations from patMeta_cl
Heatmap_Annotation <- dplyr::select(patMeta_cl, 
                                    PatientID, 
                                    IGHV.status, 
                                    Methylation_Cluster,
                                    trisomy12,
                                    TP53,
                                    ATM,
                                    treatment, 
                                    gender, 
                                    Cluster) %>%
  
  #Adjust names/levels for legend
  mutate(treatment=case_when(treatment==0~"No", 
                             treatment==1~"Yes")) %>%

  mutate(gender=case_when(gender=="f"~"Female", 
                          gender=="m"~"Male")) %>%
  
  mutate(IGHV.status=case_when(IGHV.status=="U"~"Unmutated", 
                               IGHV.status=="M"~"Mutated"))%>%
  
  mutate(Methylation_Cluster=case_when(Methylation_Cluster=="LP"~"Low", 
                               Methylation_Cluster=="IP"~"Intermediate", 
                               Methylation_Cluster=="HP"~"High"))


#Rename columns for legend
colnames(Heatmap_Annotation) <- c("PatientID", 
                                  "IGHV status",
                                  "Methylation Cluster",
                                  "trisomy 12", 
                                  "TP53", 
                                  "ATM",
                                  "Pretreated", 
                                  "Sex", 
                                  "Cluster")

#make Pat IDs the rownames
Heatmap_Annotation <- as.data.frame(Heatmap_Annotation)
rownames(Heatmap_Annotation) <- unlist(Heatmap_Annotation$PatientID)

#Tidy Up Heatmap Annotation table
Heatmap_Annotation$Cluster <- as.factor(Heatmap_Annotation$Cluster)

Heatmap_Annotation <- dplyr::select(Heatmap_Annotation,-"PatientID")

Define aesthetics for heatmap and annotations

########### Set Heatmap Aesthetics ###############
#Generate red-blue divergent palette
breaks <- c(seq(-3, 3, length.out = 101)) %>% `names<-`(
     colorRampPalette(c(palblues, 
                       "white",  "white", "white",
                         palreds))(101))

#Specify colors of annotations 
 ann_colors = 
   list(
    "IGHV status" = c("Unmutated"=IGHV[1],
                      "Mutated"=IGHV[2]),
    "Methylation Cluster"= c("Low"=Methylation_cluster[1],
                             "Intermediate"=Methylation_cluster[2],
                             "High"=Methylation_cluster[3]),
    "Sex" = c("Female"=Sex[1],
              "Male"=Sex[2]),
    "Pretreated" = c("No"=Mutant[1],
                  "Yes"=Mutant[2]),
    "trisomy 12" = c("1"=Mutant[2],
                     "0"=Mutant[1]),
    "ATM" = c("1"=Mutant[2],
              "0"=Mutant[1]),
    "TP53" = c("1"=Mutant[2],
               "0"=Mutant[1]),
    "Cluster" = c("1"=colors[1],
                  "2"=colors[2],
                  "3"=colors[3],
                  "4"=colors[4]))

Plot Figure

rownames(viab.mat.lims)<-rownames(viab.mat.lims) %>% 
  gsub("IL-1b","IL1\u03B2",.)%>% 
  gsub("SDF-1a","SDF-1\u03B1",.)%>% 
  gsub("Interferon gamma","Interferon \u03B3",.)%>% 
  gsub("TGF-b1","TGF-\u03B21",.)


#Plot heatmap
Fig2A <- 
pheatmap(viab.mat.lims,  
         scale = "none",
         clustering_method = "complete", 
         cluster_cols = TRUE,
         show_colnames = FALSE,
         cutree_cols = 4,
         clustering_distance_cols = cluster_matrix,
         annotation_col = Heatmap_Annotation,
         annotation_colors = ann_colors,
         cellheight = 22,
         cellwidth = 4,
         breaks = breaks,
         color= names(breaks),
         border_color=NA,
         fontsize=10,
         fontsize_row=16,
         legend_breaks = c(-3,0,3) ) 
        

Fig2A

1.5.2 Figure 2B

Lymphoctye doubling times (LDT) stratified by patient clusters

#plot LDT stratified by Patient Clusters
Fig2B <-
  patMeta_cl %>%
  #join patient meta data with clusters, to LDT dataframe
  left_join(  dplyr::select(LDT, PatientID, doubling.time), by=c("PatientID"="PatientID")) %>%
  dplyr::filter(!is.na(doubling.time)) %>%
  dplyr::mutate(Cluster=as.factor(Cluster)) %>%
  
  ggplot(aes(x=Cluster, y=doubling.time,  group=Cluster, fill=Cluster)) +
  geom_boxplot() +
  geom_beeswarm() +
  scale_fill_manual(values=colors[1:4]) +
  scale_y_log10() +
  guides(fill="none") +
  ylab("LDT in years") +
  t2 +
  stat_compare_means(method="t.test", comparison=list(c(1,2),c(3,4)), size=6, label.x = 1.3, label.y=2.1)


Fig2B

1.5.3 Figure 2C

Here we plot KM curves to show TTT stratified by cluster. Clinical follow-up information to calculate OS was available for all 192 patients. For 188 patients treatment information after sample collection was available.

######Arrange data######
clusters.surv <- left_join(clusters, survT, by = "PatientID")

#Add meta data to heatmap clusters
clusters.surv <- left_join(clusters.surv, patMeta, by="PatientID")

Univariate Cox Proportional Hazards
Cluster 1 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 1),  data=.) %>% 
    summary()
coxph_summ$coefficients %>% round(5)
##                                     coef exp(coef) se(coef)        z Pr(>|z|)
## factor2ind(Cluster, 1)Cluster:2  0.47981   1.61577  0.28715  1.67093  0.09474
## factor2ind(Cluster, 1)Cluster:3 -0.24052   0.78622  0.27089 -0.88786  0.37461
## factor2ind(Cluster, 1)Cluster:4 -1.29254   0.27457  0.33893 -3.81361  0.00014
C1vsC2_p <- coxph_summ$coefficients[1,5] %>% round(3)
C1vsC2_p
## [1] 0.095

Cluster 3 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 3),  data=.) %>% 
  summary()
coxph_summ$coefficients %>% round(5)
##                                     coef exp(coef) se(coef)        z Pr(>|z|)
## factor2ind(Cluster, 3)Cluster:1  0.24052   1.27191  0.27089  0.88786  0.37461
## factor2ind(Cluster, 3)Cluster:2  0.72033   2.05510  0.33097  2.17639  0.02953
## factor2ind(Cluster, 3)Cluster:4 -1.05202   0.34923  0.37578 -2.79957  0.00512

Cluster 3 vs C1+2

coxph_summ <- clusters.surv%>%
  filter(Cluster%in%c(1,2,3)) %>% 
  dplyr::mutate(Cluster=factor(Cluster), Cluster_12vs3=ifelse(Cluster%in%c(1,2),1,0))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster_12vs3, 1),  data=.) %>% 
  summary()
coxph_summ$coefficients %>% round(5)
##                                 coef exp(coef) se(coef)       z Pr(>|z|)
## factor2ind(Cluster_12vs3, 1) 0.35273   1.42295  0.25861 1.36394  0.17259

Cluster 4 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 4),  data=.) %>% 
  summary()
coxph_summ$coefficients %>% round(5)
##                                    coef exp(coef) se(coef)       z Pr(>|z|)
## factor2ind(Cluster, 4)Cluster:1 1.29254   3.64203  0.33893 3.81361  0.00014
## factor2ind(Cluster, 4)Cluster:2 1.77235   5.88467  0.38930 4.55263  0.00001
## factor2ind(Cluster, 4)Cluster:3 1.05202   2.86344  0.37578 2.79957  0.00512
C3vsC4_p<-coxph_summ$coefficients[3,5] %>% round(3)
C3vsC4_p
## [1] 0.005

Multivariate Cox Proportional Hazards
Cluster 1 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 1)+IGHV.status+trisomy12+TP53,  data=.) %>% 
  summary()
coxph_summ$coefficients
##                                        coef exp(coef)  se(coef)          z
## factor2ind(Cluster, 1)Cluster:2  0.55573674 1.7432248 0.3164764  1.7560134
## factor2ind(Cluster, 1)Cluster:3  0.03979159 1.0405939 0.2981304  0.1334704
## factor2ind(Cluster, 1)Cluster:4 -0.78031774 0.4582604 0.3983280 -1.9589831
## IGHV.statusU                     0.55191581 1.7365768 0.2725341  2.0251255
## trisomy121                      -0.13357407 0.8749627 0.3561725 -0.3750263
## TP531                            1.38977496 4.0139467 0.2607176  5.3305760
##                                     Pr(>|z|)
## factor2ind(Cluster, 1)Cluster:2 7.908611e-02
## factor2ind(Cluster, 1)Cluster:3 8.938214e-01
## factor2ind(Cluster, 1)Cluster:4 5.011477e-02
## IGHV.statusU                    4.285447e-02
## trisomy121                      7.076409e-01
## TP531                           9.790173e-08
C1vsC2_p_mv<-coxph_summ$coefficients[1,5] %>% round(3)
C1vsC2_p_mv
## [1] 0.079

Cluster 4 as reference

coxph_summ <- clusters.surv%>%
  mutate(Cluster=factor(Cluster))%>%
    coxph(Surv( TTT, treatedAfter) ~  factor2ind(Cluster, 4)+IGHV.status+trisomy12+TP53,  data=.) %>%
  summary()
coxph_summ$coefficients
##                                       coef exp(coef)  se(coef)          z
## factor2ind(Cluster, 4)Cluster:1  0.7803177 2.1821655 0.3983280  1.9589831
## factor2ind(Cluster, 4)Cluster:2  1.3360545 3.8040051 0.4618269  2.8929770
## factor2ind(Cluster, 4)Cluster:3  0.8201093 2.2707481 0.3975967  2.0626662
## IGHV.statusU                     0.5519158 1.7365768 0.2725341  2.0251255
## trisomy121                      -0.1335741 0.8749627 0.3561725 -0.3750263
## TP531                            1.3897750 4.0139467 0.2607176  5.3305760
##                                     Pr(>|z|)
## factor2ind(Cluster, 4)Cluster:1 5.011477e-02
## factor2ind(Cluster, 4)Cluster:2 3.816093e-03
## factor2ind(Cluster, 4)Cluster:3 3.914435e-02
## IGHV.statusU                    4.285447e-02
## trisomy121                      7.076409e-01
## TP531                           9.790173e-08
C3vsC4_p_mv<-coxph_summ$coefficients[3,5] %>% round(3)
C3vsC4_p_mv
## [1] 0.039

Plot figure

##Run survfit on TTT
fit <- survfit(Surv(TTT, treatedAfter)~Cluster, clusters.surv)


t_plot<-  t2+theme(plot.title=element_blank(), axis.title.x=element_blank(), legend.key = element_blank())
t_table<- t2+theme(plot.title=element_blank(), panel.grid.major.x = element_blank())

Fig2_KM_surv <- 
  ggsurvplot(fit,
             surv.median.line = "hv", # Add medians survival
             legend.title = "Cluster",# Change legends: title & labels
             legend.labs = c("1", "2","3","4"),
             pval = FALSE,
             risk.table = TRUE,
             palette = c(colors[1], colors[2], colors[3], colors[4]),
             ggtheme = t_plot,
             tables.theme = t_table,
             legend = "bottom",
             xlab="Time in years",
             ylab="Time to next treatment (probability)",
             break.time.by=1, 
             xlim=c(0,6.8))

Fig2_KM_surv_plot <-
  Fig2_KM_surv$plot +
  annotate(geom="text", x=6.2, y=0.5,    label=C3vsC4_p, color="black", size=6) +
  geom_segment(x = 5.7, xend = 5.7, y = 0.72, yend = 0.28, color="black") +
  geom_segment(x = 5.6, xend = 5.7, y = 0.72, yend = 0.72, color="black") +
  geom_segment(x = 5.6, xend = 5.7, y = 0.281, yend = 0.281, color="black") +
  annotate(geom="text", x=6.8, y=0.23, label=C1vsC2_p, color="black", size=6) +
  geom_segment(x = 6.3, xend = 6.3, y = 0.33, yend = 0.13, color="black") +
  geom_segment(x = 6.2, xend = 6.3, y = 0.33, yend = 0.33, color="black") +
  geom_segment(x = 6.2, xend = 6.3, y = 0.131, yend = 0.131, color="black")


Fig2C <- wrap_elements(Fig2_KM_surv_plot + Fig2_KM_surv$table + plot_layout(ncol=1, heights = c(0.8,0.2)))

Fig2C

1.5.4 Figure 2D

Genetic predictors of cluster assignment.
Here we use a multinomial linear model with L1-penalty, implemented in the cvglmnet package. As the dependent variable, the cluster assignment for each patient was used. As input to the model, genetic features with more than 20% missing values were excluded, and only patients with complete annotation are included in the model (n=137). As predictors, the genetic mutations and CNVs (p=39) and IGHV status (coded as 0-1) are used, using 3-fold cross-validation. Misclassification error is used as loss for cross- validation.
Prepare feature matrix

#Generate Matrix
#select features from patient meta file
geneMatrix <- 
  dplyr::select(patMeta,
                -c(gender:treatment)) %>%
  
  #adjust IGHV.status levels  U and M to numeric 1 and 0 
  mutate(IGHV = ifelse(is.na(IGHV.status), NA,
                       ifelse(IGHV.status == "M", 1, 0)), 
         #remove old columns and remove Methylation Cluster
         IGHV.status=NULL, Methylation_Cluster=NULL ) %>%
  
  #convert factors to numeric
  mutate_if(is.factor, as.character) %>%
  mutate_at(vars(-PatientID), as.numeric) %>%

  #convert to matrix format, with patient IDs as rownames
  data.frame() %>% 
  column_to_rownames("PatientID") %>% 
  as.matrix()


#Tidy matrix for use in glmnet function

#Remove genes with higher than 20% missing values
geneMatrix <- geneMatrix[,colSums(is.na(geneMatrix))/nrow(geneMatrix) <= 0.2]

#Filter for patients with complete data
geneMatrix.complete <- geneMatrix[complete.cases(geneMatrix),]


#Combine KRAS, NRAS and BRAF mutations into a single column
#set up empty matrix
Ras_Raf <- matrix(NA, 
                  nrow = nrow(geneMatrix.complete), 
                  ncol = 1)

colnames(Ras_Raf) <- "RAS/RAF"

#add RAS/RAF column to matrix
geneMatrix.complete <- cbind(geneMatrix.complete, Ras_Raf)

#Annotate RAS_RAF where where any of KRAS, NRAS or BRAF are mutated
geneMatrix.complete[,"RAS/RAF"] <- ifelse(geneMatrix.complete[,"KRAS"]==1,1,
                                                ifelse(geneMatrix.complete[,"BRAF"]==1,1,
                                                  ifelse(geneMatrix.complete[,"NRAS"]==1, 1, 0)))


#remove KRAS, NRAS and BRAF columns
geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "KRAS"]

geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "BRAF"]

geneMatrix.complete <- 
  geneMatrix.complete[, colnames(geneMatrix.complete) != "NRAS"]

Prepare response matrix

#get Clusters and Patients
y <- 
  dplyr::select(patMeta_cl, PatientID, Cluster) %>% 
  column_to_rownames("PatientID")

#Cluster column as.numeric
y$Cluster <- as.numeric(as.character(y$Cluster))

#transform matrix
y <- t(y)

#Match response matrix and feature matrix
y <- y[,rownames(geneMatrix.complete)]

Run multinomial regression

X <- geneMatrix.complete
y <- y

res <- runGlm.multi(X, y, method = "lasso", repeats = 50, folds = 3)

Prepare table of coefficients for plot

coefList <- res$coefMat

#Filter for features whose coefficients meet the frequency and minimum value thresholds
coefTab <- lapply(names(coefList), function(d) {
   coefMat <- coefList[[d]]
   coefTab <- sumCoef(coefMat=coefMat, freqCut = 0.6, coefCut = 0.35) %>%
     mutate(cluster = d)
}) %>% bind_rows()

1.5.5 Figure 2D

coefTab %>% 
  group_by(feature) %>% 
  summarize() %>% 
  unlist()
##    feature1    feature2    feature3    feature4    feature5    feature6 
##       "ATM"    "gain8q"      "IGHV"      "POT1"   "RAS/RAF"     "SF3B1" 
##    feature7    feature8 
##      "TP53" "trisomy12"
#Annotate RAS_RAF where where any of KRAS, NRAS or BRAF are mutated
patMeta_cl[,"RAS_RAF"] <- ifelse(patMeta_cl[,"KRAS"]==1,1,
                                                ifelse(patMeta_cl[,"BRAF"]==1,1,
                                                  ifelse(patMeta_cl[,"NRAS"]==1, 1, 0)))
patMeta_cl %<>% mutate(Cluster=as.factor(Cluster),
                       RAS_RAF=as.factor(RAS_RAF))


Coef_Perc_Table<-  patMeta_cl %>%
  filter(!is.na(IGHV.status)) %>% 
  group_by(Cluster, IGHV.status) %>%  
  count() %>% 
  group_by(Cluster) %>%
  mutate(freq = n / sum(n)) %>% 
  mutate(Percentage=round(freq*100),0) %>% 
  filter(IGHV.status=="M") %>% 
  rename(IGHV.mutated=Percentage) %>% 
  select(Cluster, IGHV.mutated)
  
Coef_Perc_Table<-  patMeta_cl %>%
  filter(!is.na(trisomy12)) %>% 
  group_by(Cluster, trisomy12) %>%  
  count() %>% 
  group_by(Cluster) %>%
  mutate(freq = n / sum(n)) %>% 
  mutate(Percentage=round(freq*100),0) %>% 
  filter(trisomy12=="1") %>% 
  rename(trisomy12_positive=Percentage) %>% 
  select(Cluster, trisomy12_positive) %>% 
  left_join(Coef_Perc_Table, ., by = "Cluster")

Coef_Perc_Table<-  patMeta_cl %>%
  filter(!is.na(SF3B1)) %>% 
  group_by(Cluster, SF3B1) %>%  
  count() %>% 
  group_by(Cluster) %>%
  mutate(freq = n / sum(n)) %>% 
  mutate(Percentage=round(freq*100),0) %>% 
  filter(SF3B1=="1") %>% 
  rename(SF3B1_mut=Percentage) %>% 
  select(Cluster, SF3B1_mut)%>% 
  left_join(Coef_Perc_Table, ., by = "Cluster")

Coef_Perc_Table<-  patMeta_cl %>%
  filter(!is.na(POT1)) %>% 
  group_by(Cluster, POT1, .drop=FALSE) %>%  
  count(.drop=FALSE) %>% 
  group_by(Cluster) %>%
  mutate(freq = n / sum(n)) %>% 
  mutate(Percentage=round(freq*100),0) %>% 
  filter(POT1=="1") %>% 
  rename(POT1_mut=Percentage) %>% 
  select(Cluster, POT1_mut)%>% 
  left_join(Coef_Perc_Table, ., by = "Cluster")

Coef_Perc_Table<-  patMeta_cl %>%
  filter(!is.na(ATM)) %>% 
  group_by(Cluster, ATM) %>%  
  count() %>% 
  group_by(Cluster) %>%
  mutate(freq = n / sum(n)) %>% 
  mutate(Percentage=round(freq*100),0) %>% 
  filter(ATM=="1") %>% 
  rename(ATM_mut=Percentage) %>% 
  select(Cluster, ATM_mut)%>% 
  left_join(Coef_Perc_Table, ., by = "Cluster")

Coef_Perc_Table<-  patMeta_cl %>%
  filter(!is.na(TP53)) %>% 
  group_by(Cluster, TP53) %>%  
  count() %>% 
  group_by(Cluster) %>%
  mutate(freq = n / sum(n)) %>% 
  mutate(Percentage=round(freq*100),0) %>% 
  filter(TP53=="1") %>% 
  rename(TP53_mut=Percentage) %>% 
  select(Cluster, TP53_mut)%>% 
  left_join(Coef_Perc_Table, ., by = "Cluster")

Coef_Perc_Table<-  patMeta_cl %>%
  filter(!is.na(RAS_RAF)) %>% 
  group_by(Cluster, RAS_RAF, .drop=FALSE) %>%  
  count(.drop=FALSE) %>% 
  group_by(Cluster) %>%
  mutate(freq = n / sum(n)) %>% 
  mutate(Percentage=round(freq*100),0) %>% 
  filter(RAS_RAF=="1") %>% 
  rename(RAS_RAF_mut=Percentage) %>% 
  select(Cluster, RAS_RAF_mut)%>% 
  left_join(Coef_Perc_Table, ., by = "Cluster")

Coef_Perc_Table<-  patMeta_cl %>%
  filter(!is.na(gain8q)) %>% 
  group_by(Cluster, gain8q, .drop=FALSE) %>% 
  count(.drop = FALSE) %>% 
  group_by(Cluster) %>%
  mutate(freq = n / sum(n)) %>% 
  mutate(Percentage=round(freq*100),0) %>% 
  filter(gain8q=="1") %>% 
  rename(gain8q_positive=Percentage) %>% 
  select(Cluster, gain8q_positive)%>% 
  left_join(Coef_Perc_Table, ., by = "Cluster")

# Coef_Perc_Table
plotTab <-
  coefTab %>%
  dplyr::group_by(feature, cluster) %>% 
  dplyr::summarise(meanCoef = mean(coef),
                   sdCoef = sd(coef)) %>%
  dplyr::arrange(desc(meanCoef)) %>% 
  ungroup()
## `summarise()` has grouped output by 'feature'. You can override using the
## `.groups` argument.
#update labelling 
plotTab <- plotTab %>% mutate(feature = ifelse(feature == "IGHV", "IGHV status", 
                                   ifelse(feature == "trisomy12", "trisomy 12", 
                                      ifelse(feature == "gain8q", "gain(8q)", feature))))

plotTab$feature <- factor(plotTab$feature,
                          levels=c("IGHV status", 
                                   "trisomy 12",
                                   "SF3B1", 
                                   "POT1",
                                   "ATM", 
                                   "TP53",
                                   "RAS/RAF",
                                   "gain(8q)"))


plotTab_for_Perc<- plotTab %>% 
  mutate(feature=case_when(feature=="IGHV status"~"IGHV.mutated",
                           feature=="trisomy 12"~"trisomy12_positive",
                           feature=="POT1"~"POT1_mut",
                           feature=="SF3B1"~"SF3B1_mut",
                           feature=="gain(8q)"~"gain8q_positive",
                           feature=="RAS/RAF"~"RAS_RAF_mut",
                           feature=="TP53"~"TP53_mut",
                           feature=="ATM"~"ATM_mut"))

Fig2D<-Coef_Perc_Table %>% 
  pivot_longer(IGHV.mutated:gain8q_positive, names_to = "feature", values_to = "Percentage" ) %>% 
  left_join(plotTab_for_Perc, by=c("Cluster"="cluster", "feature")) %>% 
  mutate(meanCoef=if_else(is.na(meanCoef), 0, meanCoef)) %>% 
  mutate(Cluster=factor(Cluster, levels = rev(c(1,2,3,4))),
         feature= factor(feature, 
                         levels=c("IGHV.mutated","trisomy12_positive","POT1_mut","SF3B1_mut", "gain8q_positive","RAS_RAF_mut","TP53_mut","ATM_mut"), 
                         labels=c("IGHV mutated","trisomy 12","POT1","SF3B1","gain8q","KRAS, BRAF or NRAS","TP53","ATM" ))) %>% 
  
  ggplot(aes(y=Cluster, x=feature))+
  geom_tile(aes(fill=meanCoef),color = "grey")+
   scale_fill_gradient2(low ="#003DA5",  mid ="white",  high = "#A6093D", midpoint = 0)+
   geom_text(aes( label=paste0(Percentage, "%")), size=7 )+
    t1+
   t.leg+
  theme(legend.position = "bottom", 
        legend.key.width = unit(1.7, "cm"),
         axis.text.x= element_text(size=fontsize+4, angle=45, vjust=1))+
  labs(fill="Coefficient", x="Genetic alteration")

  Fig2D

1.5.6 Figure 2E

GSEA of differentially expressed genes between clusters.
Here, for the n=49 patient samples for which we have viability data and RNA–Seq data for matching samples, we search for associations of these two data types. Using DESeq2 we regress RNA-Seq count data on the patient clusters C3, C4 (design formula ~ IGHV.status + Cluster). Genes are ranked by their test statistics and Gene Set Enrichment Analysis (GSEA) implementing the fgsea algorithm with the clusterProfiler package is applied to the ranked lists with the Hallmark pathway gene sets from the MSigDB database.

Prepare RNA count data

cytSeq <- dds_smp

#Filter out IGHV genes
cytSeq <- cytSeq[!grepl("IG_", rowData(cytSeq)$biotype),] 

#split into 3,4
cytSeq.34 <- cytSeq[,colData(cytSeq)[,"Cluster"] %in% c(3,4)]

#remove patients where IGHV is unknown
cytSeq.34 <- cytSeq.34[,colData(cytSeq.34)[,"IGHV.status"] %in% c("U","M")]

#set order of factors
cytSeq.34$Cluster <- factor(cytSeq.34$Cluster, 
                            levels = c("3","4"))

cytSeq.34$IGHV.status <- factor(cytSeq.34$IGHV.status, 
                                levels = c("U","M"))

design(cytSeq.34) <- ~ IGHV.status + Cluster

Run DESeq

cytSeq.34 <- DESeq(cytSeq.34)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 688 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing

Extract results for 3 versus 4

res.ds <- 
  results(cytSeq.34, contrast = c("Cluster", 3, 4), tidy = TRUE) %>%
  dplyr::rename(Symbol = "row") %>% 
  dplyr::arrange(pvalue) 

Add Entrez IDs and readable gene names to results table

#get ensembl ids to Entrez dataframe
ens2entrez <- 
  rowData(cytSeq.34)[c("entrezgene", "symbol")] %>% 
  as.data.frame() %>% 
  rownames_to_column("ENSEMBL") %>%  
  tibble::as_tibble()


#Join 
res.ds <- left_join(res.ds, ens2entrez, by=c("Symbol"="ENSEMBL"))

Get ranks dataframes to feed to fgsea

d <- 
  dplyr::select(res.ds, entrezgene, stat) %>%
  na.omit() %>% 
  dplyr::group_by(entrezgene) %>% 
  dplyr::summarize(stat=mean(stat)) 

## feature 1: numeric vector
geneList <- d$stat

## feature 2: named vector
names(geneList) <- as.character(d$entrezgene)

## feature 3: decreasing order
geneList <- sort(geneList, decreasing = TRUE)
hm2gene <- 
  msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, entrez_gene)
head(hm2gene)
## # A tibble: 6 x 2
##   gs_name               entrez_gene
##   <chr>                       <int>
## 1 HALLMARK_ADIPOGENESIS          19
## 2 HALLMARK_ADIPOGENESIS       11194
## 3 HALLMARK_ADIPOGENESIS       10449
## 4 HALLMARK_ADIPOGENESIS          33
## 5 HALLMARK_ADIPOGENESIS          34
## 6 HALLMARK_ADIPOGENESIS          35
#tidy up terms
hm2gene$gs_name <- gsub("HALLMARK_", "",hm2gene$gs_name)
hm2gene$gs_name <- gsub("_", " ",hm2gene$gs_name)
hm2gene$gs_name <-gsub("MTORC1 SIGNALING"   ,"MTORC1 Signaling",hm2gene$gs_name)
hm2gene$gs_name <-gsub("MYC TARGETS V1",    "MYC Targets V1"    ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("OXIDATIVE PHOSPHORYLATION", "Oxidative Phosphorylation",hm2gene$gs_name)
hm2gene$gs_name <-gsub("TNFA SIGNALING VIA NFKB",   "TNFa Signaling via NFKB"   ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("UNFOLDED PROTEIN RESPONSE", "Unfolded Protein Response" ,hm2gene$gs_name)   
hm2gene$gs_name <-gsub("UV RESPONSE UP",    "UV Response Up",hm2gene$gs_name)
hm2gene$gs_name <-gsub("INTERFERON GAMMA RESPONSE", "Interferon Gamma Response" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("G2M CHECKPOINT",    "G2M Checkpoint",hm2gene$gs_name)   
hm2gene$gs_name <-gsub("E2F TARGETS", "E2F Targets" ,hm2gene$gs_name)
hm2gene$gs_name <-gsub("P53 PATHWAY",   "P53 Pathway",hm2gene$gs_name)
#run GSEA
set.seed(1996)
gsea.res <- GSEA(geneList, TERM2GENE = hm2gene, by = "fgsea", seed = TRUE)

#make readable with gene names
gsea.res <- setReadable(gsea.res, org.Hs.eg.db, keyType = "ENTREZID")

Show top 10 pathways

#get dataframe of results
gsea.df <- fortify(gsea.res, 
              showCategory = 10, #how many levels to show
              split=NULL)

#order by NES
gsea.df <- dplyr::mutate(gsea.df, "NES" = eval(parse(text="NES")))
idx <- order(gsea.df[["NES"]], decreasing = TRUE)
gsea.df$Description <- factor(gsea.df$Description, levels=rev(unique(gsea.df$Description[idx])))
    
  
gsea.df$pos <- 0

  Fig2E <-
   ggplot(gsea.df,
         aes_string(x="NES", 
                    y="Description", 
                    fill="p.adjust")) +
    geom_bar(stat = "identity") +
    scale_fill_continuous(low=palreds[7],
                         high=palreds[2], 
                         name = "Adjusted p value",
                         guide=guide_colorbar(reverse=TRUE)) +
    
  ylab("Hallmark Pathway\n\n") + 
  xlab("Normalised Enrichment Score") +
  ggtitle("Upregulation of gene sets in C3 versus C4") +  
  t2 + 
  scale_size(range=c(3, 8)) + 
  geom_text(aes(label=Description, x = pos), nudge_x = 0.1, hjust = 0,  size = 6, color = "white") +
  theme(legend.position=c(0.9, 0.2),
        legend.background = element_blank(),
        legend.box.background = element_rect(size = 0.5),
        legend.title = element_text(face='bold',
                                    hjust = 1, size=11),
        legend.key = element_blank(),
        legend.text = element_text(size=12),
        axis.text.y = element_blank(), 
        axis.ticks.y = element_blank()) 
  

Fig2E

1.6 Arrange plots

design1<-"
11
23
45
"

tp<-ggplot2::theme(plot.tag=element_text(size = 30))


Figure2 <-
  wrap_elements(arrangeGrob(Fig2A[[4]])) +tp+
  wrap_elements(Fig2B) + tp+
  Fig2C + tp+
  wrap_elements(Fig2D) + tp+
  wrap_elements(Fig2E) + tp+
  
  plot_layout(design=design1, 
              heights = c(1.6, 1.2, 1.2) , 
              widths = c(1,1)) +
  
  plot_annotation(tag_levels = "A", title="Figure 2", theme = theme(title=element_text(size = 20)))

Figure2

1.7 Additional figures for Revision

1.7.1 Additional Figure of Methylation Clusters and Microenviroenmental response

patMeta_cl %>% 
  group_by(Cluster, Methylation_Cluster) %>% 
  count() %>% 
  group_by(Cluster) %>% 
  mutate(freq = n / sum(n)) %>% 
   plyr::ddply("Cluster", transform, label_ypos=cumsum(freq)) %>% 


  ggplot(aes(x=Cluster, y=freq, fill=Methylation_Cluster))+
  geom_bar(stat="identity")+
  ylab("Relative incidence of\n methylation profiles")+
  t2

1.7.2 Old Figure 2D

Plot Figure 2D:
Coefficients (feature importance) for each cluster

#average remaining coefficients in dataframe for plotting
#ie show all sig features, the cluster they predict, and the average coefficient 
plotTab <-
  coefTab %>%
  dplyr::group_by(feature, cluster) %>% 
  dplyr::summarise(meanCoef = mean(coef),
                   sdCoef = sd(coef)) %>%
  dplyr::arrange(desc(meanCoef)) %>% 
  ungroup()
## `summarise()` has grouped output by 'feature'. You can override using the
## `.groups` argument.
#update labelling 
plotTab <- plotTab %>% mutate(feature = ifelse(feature == "IGHV", "IGHV status", 
                                   ifelse(feature == "trisomy12", "trisomy 12", 
                                      ifelse(feature == "gain8q", "gain(8q)", feature))))

plotTab$feature <- factor(plotTab$feature,
                          levels=c("IGHV status", 
                                   "trisomy 12",
                                   "SF3B1", 
                                   "POT1",
                                   "ATM", 
                                   "TP53",
                                   "RAS/RAF",
                                   "gain(8q)"))


Cluster.labs <- c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4")
names(Cluster.labs) <- c(1, 2, 3, 4)
 

Fig2D <- 
    ggplot(plotTab, aes(x=feature, y=meanCoef))+ 
    geom_bar(stat= "identity", aes(fill = cluster))+
    geom_errorbar(aes(ymin = meanCoef - sdCoef, 
                      ymax = meanCoef + sdCoef))+
    ggtitle("") +
    coord_cartesian(ylim=c(-1.5, 1.75))+
    geom_hline(yintercept=0)+
    ylab("Depleted / Enriched\n")+
    xlab("")+
    scale_fill_manual(values=colors[1:4])+
    t1+
   t.leg+

   theme(strip.text = element_text(size = fontsize),
         axis.text.x= element_text(size=fontsize+4, angle=45, vjust=1))+
    facet_wrap(~cluster, 
               nrow=4, 
               strip.position = "right", 
               labeller = labeller(cluster = Cluster.labs))+
   guides(fill = "none")

Fig2D

1.7.3 Additional Figure of CLL_PD and Clusters of Microenvironmental response

load( "../../data/CLL_PD.RData")

CLL_PD %>% 
  left_join( patMeta_cl, by="PatientID") %>% 
  
  
  ggplot(aes(x=Cluster, y=CLL_PD,group=Cluster, fill=Cluster ))+
  geom_boxplot(outlier.shape = NA)+
  geom_beeswarm(aes(shape=CLL_PD_group ))+
  t2+
  scale_fill_manual(values=colors[1:4]) +
  guides(fill="none") +
  ylab("CLL Proliferative Drive") +
  stat_compare_means(method="t.test", comparison=list(c(1,2),c(3,4)), size=6, label.x = 1.3, label.y=3.1)+
  coord_cartesian(clip="off")

1.7.4 Correlation between CLL-PD and Stimuli response

1.7.4.1 Overview

x<-left_join(CLL_PD, df, by = "PatientID") %>% 
  group_by(PatientID,CLL_PD ) %>% 
  summarise(.groups="keep") %>% 
  ungroup() %>% 
  arrange(PatientID) %>% 
  select(CLL_PD) %>% 
  unlist()

# Stimuli<-"IL-4"

Stim_PD.corr <- vector("list", length = length(unique(df$Cytokine)))
names(Stim_PD.corr)  <- unique(df$Cytokine)

for(Stimuli in unique(df$Cytokine)){

y<-left_join(CLL_PD, df, by = "PatientID") %>% 
  filter(Cytokine==Stimuli)%>% 
  group_by(PatientID,Log ) %>% 
  summarise(.groups="keep") %>% 
  ungroup() %>% 
  arrange(PatientID) %>% 
  select(Log) %>% 
  unlist()

cor <- cor.test(x, y, conf.level = 0.95, method = c("pearson"))
    
    #convert to table
    cor.df <- unlist(cor) %>% as.data.frame()
    colnames(cor.df) <- Stimuli
    
    #add corr.test output to drug.corr dataframe
    Stim_PD.corr[[Stimuli]] <- cor.df

}

Stim_PD.corr_all <- do.call(cbind, Stim_PD.corr) %>% 
  rownames_to_column("feature")

Stim_PD.corr_all <- setNames(data.frame(t(Stim_PD.corr_all[,-1])), Stim_PD.corr_all[,1])

Stim_PD.corr_all <-rownames_to_column(Stim_PD.corr_all, var="Cytokine") %>% 
  mutate(p.value=as.numeric(p.value), estimate.cor=as.numeric(estimate.cor))


Stim_PD.corr_all %<>% 
mutate(adj.p.value=p.adjust(p.value, method = "BH"))



ggplot( Stim_PD.corr_all, aes(x=estimate.cor, y=-log10(adj.p.value))) +
      # annotate("rect", xmin = 0.0, xmax = Inf, ymin = -Inf, ymax = Inf, fill = palreds[1], alpha = 0.2)+
      # annotate("rect", xmin = 0.0, xmax = -Inf, ymin = -Inf, ymax = Inf,alpha = 0.2, fill = palblues[1])+
      geom_point(aes(alpha=0.4)) +
      theme(legend.position = "none") +  
      geom_hline(yintercept = -log10(0.05))+
      geom_label_repel(aes(label=ifelse(adj.p.value<0.05&abs(estimate.cor)>0.1,Cytokine,'')))+
      t2 +
      labs(title="Correlation between CLL-PD and response to Stimuli", x = "Pearson Correlation coefficient", y = "-log10(p-val)")+
      guides(color="none", alpha="none")

1.7.4.2 Scatter Plots

1.7.4.2.1 Scatter Plots for all stimuli in all patients
# Stimuli<-"Resiquimod"
Stimuli_list<-  arrange(Stim_PD.corr_all, estimate.cor) %>% 
    select(Cytokine) %>% 
    unlist() %>% 
    rev()

for( Stimuli in Stimuli_list){


Pearson_Correlation<-Stim_PD.corr_all %>% 
  filter(Cytokine==Stimuli) %>% 
  select(estimate.cor) %>% 
  unlist() %>% 
  round(2)

scatterplot<-left_join(CLL_PD, df, by = "PatientID") %>% 
  left_join(patMeta, by = "PatientID") %>% 
  filter(Cytokine==Stimuli,
         !is.na(IGHV.status), !is.na(trisomy12)) %>% 
  
  ggplot(aes(x=CLL_PD, y=Log))+
geom_point(aes(color=IGHV.status, shape=trisomy12))+
  geom_smooth(method = "lm", formula = 'y ~ x')+
  stat_cor(method = "pearson")+
   # geom_text(x=-1, y=2, label=paste0("Pearson Correlation Coefficient: ", Pearson_Correlation), size=4)+
  t2+
  ggtitle(paste0(Stimuli))

print(scatterplot)
}

1.7.4.2.2 Scatter Plots for all stimuli faceted by IGHV status
# Stimuli<-"Resiquimod"

  
for( Stimuli in Stimuli_list){



Pearson_Correlation<-Stim_PD.corr_all %>% 
  filter(Cytokine==Stimuli) %>% 
  select(estimate.cor) %>% 
  unlist() %>% 
  round(2)

scatterplot<-left_join(CLL_PD, df, by = "PatientID") %>% 
  left_join(patMeta, by = "PatientID") %>% 
  filter(Cytokine==Stimuli,
         !is.na(IGHV.status), !is.na(trisomy12)) %>% 
  
  ggplot(aes(x=CLL_PD, y=Log))+
geom_point(aes(color=IGHV.status, shape=trisomy12))+
  geom_smooth(method = "lm", formula = 'y ~ x')+
  stat_cor(method = "pearson")+
   # geom_text(x=-1, y=2, label=paste0("Pearson Correlation Coefficient: ", Pearson_Correlation), size=4)+
  t2+
  ggtitle(paste0(Stimuli))+
  facet_wrap(~IGHV.status)

print(scatterplot)
}

1.7.4.2.3 Scatter Plots for all stimuli faceted by Cluster
# Stimuli<-"Resiquimod"

for( Stimuli in Stimuli_list){


Pearson_Correlation<-Stim_PD.corr_all %>% 
  filter(Cytokine==Stimuli) %>% 
  select(estimate.cor) %>% 
  unlist() %>% 
  round(2)

scatterplot<-left_join(CLL_PD, df, by = "PatientID") %>% 
  left_join(patMeta_cl, by = "PatientID") %>% 
  filter(Cytokine==Stimuli,
         !is.na(IGHV.status), !is.na(trisomy12)) %>% 
  
  ggplot(aes(x=CLL_PD, y=Log))+
geom_point(aes(color=IGHV.status, shape=trisomy12))+
  geom_smooth(method = "lm", formula = 'y ~ x')+
  stat_cor(method = "pearson")+
   # geom_text(x=-1, y=2, label=paste0("Pearson Correlation Coefficient: ", Pearson_Correlation), size=4)+
  t2+
  ggtitle(paste0(Stimuli))+
  facet_wrap(~Cluster)

print(scatterplot)
}

1.7.4.3 Overall Cytokine Response and CLL-PD

left_join(CLL_PD, df, by = "PatientID") %>% 
  left_join(patMeta_cl, by = "PatientID") %>%
  group_by(PatientID, CLL_PD,IGHV.status) %>% 
  summarise(Cytokine_Overall_Response=mean((Log)), .groups="keep") %>% 
  
  ggplot(aes(x=CLL_PD, y= Cytokine_Overall_Response))+
  geom_point(aes(color=IGHV.status))+
  geom_smooth(method="lm", formula = 'y ~ x')+
  stat_cor()+
  t2

1.7.4.4 Response to selected Cytokines and CLL-PD

left_join(CLL_PD, df, by = "PatientID") %>% 
  left_join(patMeta_cl, by = "PatientID") %>%
  filter(Cytokine%in%c("Resiquimod" ,"CpG ,ODN" ,"sCD40L" ,"IL-1b" ,"soluble ,anti-IgM" ,"BAFF" ,"TGF-b1")) %>% 
  group_by(PatientID, CLL_PD,IGHV.status) %>% 
  summarise(Cytokine_Overall_Response=mean((Log)), .groups="keep") %>% 
  
  ggplot(aes(x=CLL_PD, y= Cytokine_Overall_Response))+
  geom_point(aes(color=IGHV.status))+
  geom_smooth(method="lm", formula = 'y ~ x')+
  stat_cor()+
  t2+
  ylab("Response to selected Cytokines")

1.7.4.5 Overall Stimuli response by Cluster

df %>% 
  group_by(PatientID) %>% 
  summarise(Cytokine_Overall_Response=mean((Log)), .groups="keep") %>% 
  
  left_join(patMeta_cl, by = "PatientID") %>% 
  left_join(LDT, by = "PatientID") %>% 
  
  ggplot(aes(x=Cluster, y=Cytokine_Overall_Response))+
  geom_boxplot(aes(fill=Cluster))+
  geom_beeswarm()+
  t2+
  scale_fill_manual(values=colors[1:4]) +
  guides(fill="none") +
  ylab("Overall Stimuli response") +
  stat_compare_means(method="t.test", comparison=list(c(1,2),c(3,4)), size=6, label.x = 1.3, label.y=0.45)+
  coord_cartesian(clip="off")

1.7.4.6 Correlation of LDT and Overall Stimuli response

df %>% 
  group_by(PatientID) %>% 
  summarise(Cytokine_Overall_Response=mean((Log)), .groups="keep") %>% 
  
  left_join(patMeta_cl, by = "PatientID") %>% 
  left_join(LDT, by = "PatientID") %>% 
  filter(!is.na(doubling.time)) %>% 
  
   ggplot(aes(x=Cytokine_Overall_Response, y= doubling.time))+
  geom_point(aes())+
  geom_smooth(method="lm", formula = 'y ~ x')+
  stat_cor()+
  scale_y_log10()+
  t2+
  xlab("Overall Stimuli response") +
  ylab("LDT")

1.7.5 Drug Response heatmap

Heatmap of all scaled log(viability) values, for all patient samples and drugs.

########### Viability matrix ################
#make viability matrix for cytokine treatments for patients
viab.mat <- filter(df_full, Drug!="DMSO", Cytokine=="No Cytokine", Drug_Concentration=="High") %>% 
            dplyr::select(PatientID, Log, Drug) %>% 
            tidyr::spread(Drug, Log) %>%
  as.data.frame()

#make PatientID the row names
rownames(viab.mat) <- unlist(viab.mat[,1]) # the first row will be the header
viab.mat <- viab.mat[,-1]

#Transform matrix
viab.mat <- t(viab.mat)

#keep unscaled matrix 
viab.mat.unscaled <- viab.mat

#run scaleCytResp on viability matrix
viab.mat <- scaleCytResp(viab.mat)

#apply deckel function to matrix
#Calculate dendrogram 
breaks <- c(seq(-2,2, length.out = 101))

viab.mat.lims <- deckel(viab.mat,
                            lower = dplyr::first(breaks),
                            upper = dplyr::last(breaks))

Arrange annotations for heatmap

#Sort heatmap annotations
#Select annotations from patMeta_cl
Heatmap_Annotation <- dplyr::select(patMeta_cl, 
                                    PatientID, 
                                    IGHV.status, 
                                    Methylation_Cluster,
                                    trisomy12,
                                    TP53,
                                    ATM,
                                    treatment, 
                                    gender, 
                                    Cluster) %>%
  
  #Adjust names/levels for legend
  mutate(treatment=case_when(treatment==0~"No", 
                             treatment==1~"Yes")) %>%

  mutate(gender=case_when(gender=="f"~"Female", 
                          gender=="m"~"Male")) %>%
  
  mutate(IGHV.status=case_when(IGHV.status=="U"~"Unmutated", 
                               IGHV.status=="M"~"Mutated"))%>%
  
  mutate(Methylation_Cluster=case_when(Methylation_Cluster=="LP"~"Low", 
                               Methylation_Cluster=="IP"~"Intermediate", 
                               Methylation_Cluster=="HP"~"High"))


#Rename columns for legend
colnames(Heatmap_Annotation) <- c("PatientID", 
                                  "IGHV status",
                                  "Methylation Cluster",
                                  "trisomy 12", 
                                  "TP53", 
                                  "ATM",
                                  "Pretreated", 
                                  "Sex", 
                                  "Cluster")

#make Pat IDs the rownames
Heatmap_Annotation <- as.data.frame(Heatmap_Annotation)
rownames(Heatmap_Annotation) <- unlist(Heatmap_Annotation$PatientID)

#Tidy Up Heatmap Annotation table
Heatmap_Annotation$Cluster <- as.factor(Heatmap_Annotation$Cluster)

Heatmap_Annotation <- dplyr::select(Heatmap_Annotation,-"PatientID")

Define aesthetics for heatmap and annotations

########### Set Heatmap Aesthetics ###############
#Generate red-blue divergent palette
breaks <- breaks %>% `names<-`(
     colorRampPalette(c(palblues, "white",palreds))(101))

#Specify colors of annotations 
 ann_colors = 
   list(
    "IGHV status" = c("Unmutated"=IGHV[1],
                      "Mutated"=IGHV[2]),
    "Methylation Cluster"= c("Low"=Methylation_cluster[1],
                             "Intermediate"=Methylation_cluster[2],
                             "High"=Methylation_cluster[3]),
    "Sex" = c("Female"=Sex[1],
              "Male"=Sex[2]),
    "Pretreated" = c("No"=Mutant[1],
                  "Yes"=Mutant[2]),
    "trisomy 12" = c("1"=Mutant[2],
                     "0"=Mutant[1]),
    "ATM" = c("1"=Mutant[2],
              "0"=Mutant[1]),
    "TP53" = c("1"=Mutant[2],
               "0"=Mutant[1]),
    "Cluster" = c("1"=colors[1],
                  "2"=colors[2],
                  "3"=colors[3],
                  "4"=colors[4]))

Plot Figure

#Plot heatmap

Cluster_for_Heatmap<-patMeta_cl %>% select(PatientID, Cluster) %>% 
  column_to_rownames("PatientID") %>% 
  as.data.frame()

Pigengene::pheatmap.type(t(viab.mat.unscaled), 
         annRow = Cluster_for_Heatmap,
         type="Cluster",
         doTranspose=TRUE,
         # scale = "row",
         # clustering_method = "complete", 
         cluster_rows = TRUE,
         show_colnames = FALSE,
         # annotation_col = Heatmap_Annotation,
         # annotation_colors = ann_colors,
         cellheight = 22,
         cellwidth = 4,
         breaks = breaks,
         color= names(breaks),
         border_color=NA,
         fontsize=10,
         fontsize_row=16
         )

Plot Figure

#Plot heatmap
viab.mat.tmp<-viab.mat.unscaled %>% 
  t() %>% 
  merge(Cluster_for_Heatmap,. , by=0)%>% 
  column_to_rownames("Row.names") %>% 
  filter(Cluster==1) %>% 
  select(-Cluster) %>% 
  t() 

viab.mat.tmp.ordered<-viab.mat.tmp[ c( "Fludarabine" ,"Nutlin-3a" ,"Luminespib"  ,"BAY-11-7085" ,"Pyridone-6" ,"Ralimetinib" ,"I-BET 762"  ,"Everolimus" ,"Selumetinib" ,"Idelalisib" ,      "Ibrutinib" ,"PRT062607"), ]
  
# row.names(viab.mat.tmp)

C1_drug_heatmap<-pheatmap(viab.mat.tmp.ordered, 
         annRow = Cluster_for_Heatmap,
         type="Cluster",
         doTranspose=TRUE,
         # scale = "row",
         # clustering_method = "complete", 
         cluster_rows = FALSE,
         show_colnames = FALSE,
         annotation_col = Heatmap_Annotation,
         annotation_colors = ann_colors,
         cellheight = 22,
         cellwidth = 4,
         breaks = breaks,
         color= names(breaks),
         border_color=NA,
         fontsize=10,
         fontsize_row=16, 
         legend=FALSE,
         annotation_legend = FALSE, 
         show_rownames=FALSE,
         annotation_names_col=FALSE
         )

#Plot heatmap
viab.mat.tmp<-viab.mat.unscaled %>% 
  t() %>% 
  merge(Cluster_for_Heatmap,. , by=0)%>% 
  column_to_rownames("Row.names") %>% 
  filter(Cluster==2) %>% 
  select(-Cluster) %>% 
  t() 

viab.mat.tmp.ordered<-viab.mat.tmp[ c( "Fludarabine" ,"Nutlin-3a" ,"Luminespib"  ,"BAY-11-7085" ,"Pyridone-6" ,"Ralimetinib" ,"I-BET 762"  ,"Everolimus" ,"Selumetinib" ,"Idelalisib" ,      "Ibrutinib" ,"PRT062607"), ]
  
# row.names(viab.mat.tmp)

C2_drug_heatmap<-pheatmap(viab.mat.tmp.ordered, 
         annRow = Cluster_for_Heatmap,
         type="Cluster",
         doTranspose=TRUE,
         # scale = "row",
         # clustering_method = "complete", 
         cluster_rows = FALSE,
         show_colnames = FALSE,
         annotation_col = Heatmap_Annotation,
         annotation_colors = ann_colors,
         cellheight = 22,
         cellwidth = 4,
         breaks = breaks,
         color= names(breaks),
         border_color=NA,
         fontsize=10,
         fontsize_row=16, 
         legend=FALSE,
         annotation_legend = FALSE, 
         show_rownames=FALSE,
         annotation_names_col=FALSE
         )

#Plot heatmap
viab.mat.tmp<-viab.mat.unscaled %>% 
  t() %>% 
  merge(Cluster_for_Heatmap,. , by=0)%>% 
  column_to_rownames("Row.names") %>% 
  filter(Cluster==3) %>% 
  select(-Cluster) %>% 
  t() 

viab.mat.tmp.ordered<-viab.mat.tmp[ c( "Fludarabine" ,"Nutlin-3a" ,"Luminespib"  ,"BAY-11-7085" ,"Pyridone-6" ,"Ralimetinib" ,"I-BET 762"  ,"Everolimus" ,"Selumetinib" ,"Idelalisib" ,      "Ibrutinib" ,"PRT062607"), ]
  
# row.names(viab.mat.tmp)

C3_drug_heatmap<-pheatmap(viab.mat.tmp.ordered, 
         annRow = Cluster_for_Heatmap,
         type="Cluster",
         doTranspose=TRUE,
         # scale = "row",
         # clustering_method = "complete", 
         cluster_rows = FALSE,
         show_colnames = FALSE,
         annotation_col = Heatmap_Annotation,
         annotation_colors = ann_colors,
         cellheight = 22,
         cellwidth = 4,
         breaks = breaks,
         color= names(breaks),
         border_color=NA,
         fontsize=10,
         fontsize_row=16, 
         legend=FALSE,
         annotation_legend = FALSE, 
         show_rownames=FALSE,
         annotation_names_col=FALSE
         )

#Plot heatmap
viab.mat.tmp<-viab.mat.unscaled %>% 
  t() %>% 
  merge(Cluster_for_Heatmap,. , by=0)%>% 
  column_to_rownames("Row.names") %>% 
  filter(Cluster==4) %>% 
  select(-Cluster) %>% 
  t() 

viab.mat.tmp.ordered<-viab.mat.tmp[ c( "Fludarabine" ,"Nutlin-3a" ,"Luminespib"  ,"BAY-11-7085" ,"Pyridone-6" ,"Ralimetinib" ,"I-BET 762"  ,"Everolimus" ,"Selumetinib" ,"Idelalisib" ,      "Ibrutinib" ,"PRT062607"), ]
  
# row.names(viab.mat.tmp)

C4_drug_heatmap<-pheatmap(viab.mat.tmp.ordered, 
         annRow = Cluster_for_Heatmap,
         type="Cluster",
         doTranspose=TRUE,
         # scale = "row",
         # clustering_method = "complete", 
         cluster_rows = FALSE,
         show_colnames = FALSE,
         annotation_col = Heatmap_Annotation,
         annotation_colors = ann_colors,
         cellheight = 22,
         cellwidth = 4,
         breaks = breaks,
         color= names(breaks),
         border_color=NA,
         fontsize=10,
         fontsize_row=16
         )

wrap_elements(arrangeGrob(C1_drug_heatmap[[4]]))+
wrap_elements(arrangeGrob(C2_drug_heatmap[[4]]))+
wrap_elements(arrangeGrob(C3_drug_heatmap[[4]]))+
wrap_elements(arrangeGrob(C4_drug_heatmap[[4]]))+
  plot_layout(nrow=1, widths = c(0.93,0.13,0.52,1.03))

## Appendix

Sys.info()
##           sysname           release           version          nodename 
##         "Windows"          "10 x64"     "build 19042" "DESKTOP-BF7AU2H" 
##           machine             login              user    effective_user 
##          "x86-64"      "PeterBruch"      "PeterBruch"      "PeterBruch"
sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.1252    
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1               stringr_1.4.0              
##  [3] purrr_0.3.4                 readr_2.1.2                
##  [5] tibble_3.1.6                tidyverse_1.3.1            
##  [7] dplyr_1.0.7                 msigdbr_7.4.1              
##  [9] org.Hs.eg.db_3.11.4         AnnotationDbi_1.50.3       
## [11] clusterProfiler_3.16.1      ConsensusClusterPlus_1.52.0
## [13] glmnet_4.1-3                Matrix_1.3-2               
## [15] survminer_0.4.9             ggpubr_0.4.0.999           
## [17] survival_3.2-13             DESeq2_1.28.1              
## [19] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
## [21] matrixStats_0.61.0          Biobase_2.48.0             
## [23] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
## [25] IRanges_2.22.2              S4Vectors_0.26.1           
## [27] BiocGenerics_0.34.0         gridExtra_2.3              
## [29] ggfortify_0.4.14            pheatmap_1.0.12            
## [31] magrittr_2.0.2              ggbeeswarm_0.6.0           
## [33] tidyr_1.2.0                 patchwork_1.1.1            
## [35] ggrepel_0.9.1               ggplot2_3.3.5              
## [37] RColorBrewer_1.1-2          BiocStyle_2.16.1           
## 
## loaded via a namespace (and not attached):
##   [1] bit64_4.0.5            knitr_1.37             data.table_1.14.2     
##   [4] rpart_4.1-15           RCurl_1.98-1.5         doParallel_1.0.16     
##   [7] generics_0.1.2         snow_0.4-4             preprocessCore_1.50.0 
##  [10] cowplot_1.1.1          RSQLite_2.2.9          europepmc_0.4.1       
##  [13] bit_4.0.4              tzdb_0.2.0             enrichplot_1.8.1      
##  [16] xml2_1.3.3             lubridate_1.8.0        assertthat_0.2.1      
##  [19] viridis_0.6.2          xfun_0.29              hms_1.1.1             
##  [22] jquerylib_0.1.4        babelgene_21.4         evaluate_0.14         
##  [25] fansi_1.0.2            progress_1.2.2         dbplyr_2.1.1          
##  [28] readxl_1.3.1           htmlwidgets_1.5.4      Rgraphviz_2.32.0      
##  [31] km.ci_0.5-2            igraph_1.2.11          DBI_1.1.2             
##  [34] geneplotter_1.66.0     ellipsis_0.3.2         backports_1.4.1       
##  [37] bookdown_0.24          markdown_1.1           annotate_1.66.0       
##  [40] libcoin_1.0-9          vctrs_0.3.8            abind_1.4-5           
##  [43] cachem_1.0.6           withr_2.4.3            ggforce_0.3.3         
##  [46] triebeard_0.3.0        bnlearn_4.7            checkmate_2.0.0       
##  [49] prettyunits_1.1.1      cluster_2.1.1          DOSE_3.14.0           
##  [52] mltools_0.3.5          crayon_1.4.2           genefilter_1.70.0     
##  [55] pkgconfig_2.0.3        labeling_0.4.2         tweenr_1.0.2          
##  [58] nlme_3.1-155           vipor_0.4.5            nnet_7.3-15           
##  [61] rlang_1.0.1            lifecycle_1.0.1        downloader_0.4        
##  [64] modelr_0.1.8           cellranger_1.1.0       Cubist_0.4.0          
##  [67] polyclip_1.10-0        partykit_1.2-15        graph_1.66.0          
##  [70] urltools_1.7.3         KMsurv_0.1-5           carData_3.0-5         
##  [73] zoo_1.8-9              base64enc_0.1-3        reprex_2.0.1          
##  [76] beeswarm_0.4.0         ggridges_0.5.3         png_0.1-7             
##  [79] viridisLite_0.4.0      bitops_1.0-7           blob_1.2.2            
##  [82] shape_1.4.6            qvalue_2.20.0          jpeg_0.1-9            
##  [85] rstatix_0.7.0          gridGraphics_0.5-1     ggsignif_0.6.3        
##  [88] scales_1.1.1           memoise_2.0.1          plyr_1.8.6            
##  [91] gdata_2.18.0           zlibbioc_1.34.0        compiler_4.0.5        
##  [94] scatterpie_0.1.7       cli_3.1.1              XVector_0.28.0        
##  [97] htmlTable_2.4.0        Formula_1.2-4          MASS_7.3-55           
## [100] mgcv_1.8-38            WGCNA_1.70-3           tidyselect_1.1.1      
## [103] stringi_1.7.6          highr_0.9              yaml_2.2.2            
## [106] GOSemSim_2.14.2        locfit_1.5-9.4         latticeExtra_0.6-29   
## [109] survMisc_0.5.5         Pigengene_1.14.0       grid_4.0.5            
## [112] sass_0.4.0             fastmatch_1.1-3        tools_4.0.5           
## [115] rstudioapi_0.13        foreign_0.8-81         foreach_1.5.2         
## [118] inum_1.0-4             C50_0.1.6              farver_2.1.0          
## [121] ggraph_2.0.5           digest_0.6.29          rvcheck_0.2.1         
## [124] BiocManager_1.30.16    ggtext_0.1.1           Rcpp_1.0.8            
## [127] gridtext_0.1.4         car_3.0-12             broom_0.7.12          
## [130] httr_1.4.2             colorspace_2.0-2       rvest_1.0.2           
## [133] XML_3.99-0.8           fs_1.5.2               splines_4.0.5         
## [136] yulab.utils_0.0.4      graphlayouts_0.8.0     ggplotify_0.1.0       
## [139] xtable_1.8-4           jsonlite_1.7.3         dynamicTreeCut_1.63-1 
## [142] tidygraph_1.2.0        ggfun_0.0.5            R6_2.5.1              
## [145] Hmisc_4.6-0            pillar_1.7.0           htmltools_0.5.2       
## [148] glue_1.6.1             fastmap_1.1.0          BiocParallel_1.22.0   
## [151] codetools_0.2-18       fgsea_1.14.0           mvtnorm_1.1-3         
## [154] utf8_1.2.2             lattice_0.20-41        bslib_0.3.1           
## [157] gtools_3.9.2           magick_2.7.3           GO.db_3.11.4          
## [160] rmarkdown_2.11         munsell_0.5.0          fastcluster_1.2.3     
## [163] DO.db_2.9              GenomeInfoDbData_1.2.3 iterators_1.0.14      
## [166] impute_1.62.0          haven_2.4.3            reshape2_1.4.4        
## [169] gtable_0.3.0